Rough clustering for data in brief

Aim

To generate some of the figures for data in brief, do a rough PCA, clustering and UMAP

Dimension reduction and visualisation

Honeycomb HIVE v1

Read data

hive <- readRDS("/stornext/Projects/GenomicsRnD/brown.d/G000254_fixed_neutrophil/HIVE/hive_qcMetrics.sce")

Have to remove low quality cells or downstream steps fail due to missing values

is.mito <- grepl("^MT-", rownames(hive))
qcstats <- perCellQCMetrics(hive, subsets=list(Mito=is.mito))
filtered <- quickPerCellQC(qcstats, percent_subsets="subsets_Mito_percent")
hive <- hive[, !filtered$discard]

Normalisation and feature selection

# Normalization.
hive <- logNormCounts(hive)

# Feature selection.
dec <- modelGeneVar(hive)
hvg <- getTopHVGs(dec, prop=0.1)

Principle component analysis

# PCA.
set.seed(1234)
hive <- runPCA(hive, ncomponents=12, subset_row=hvg)

# Clustering.
library(bluster)
colLabels(hive) <- clusterCells(hive, use.dimred='PCA',
    BLUSPARAM=NNGraphParam(cluster.fun="louvain"))    

UMAP

hive <- runUMAP(hive, dimred = 'PCA')

#saveRDS(hive, "/vast/scratch/users/brown.d/data_in_brief/hive_umap.rds")

Visulaise UMAP

By cluster label

hive$fixed <- as.factor(hive$fixed)
p1 <- plotUMAP(hive, colour_by="label",rasterise=TRUE) + 
  xlab("") + ylab("") +
  theme_Publication()

p1

By library size

hive$LibSize <- log(hive$sum)
p2 <- plotUMAP(hive, colour_by="LibSize",rasterise=TRUE) + 
  xlab("") + ylab("") +
  guides(colour=guide_legend(title="Log library \nsize")) +
  theme_Publication()

p2$layers[[1]]$aes_params$size <- 0.1
p2 + guides(color = guide_legend(override.aes = list(size = 2, alpha=1)))

By batch

The 2 batches are the different cell inputs into a distinct hive device.

p3 <- plotUMAP(hive, colour_by="fixed",rasterise=TRUE) +
  xlab("") + ylab("") +
  guides(colour=guide_legend(title="Batch")) +
  guides(color = guide_legend(override.aes = list(size = 2, alpha=1))) +
  theme_Publication()

p3$layers[[1]]$aes_params$alpha <- 0.1
p3$layers[[1]]$aes_params$size <- 0.1
p3

10x Genomics Flex

Read data

flex <- readRDS("/stornext/Projects/GenomicsRnD/brown.d/G000254_fixed_neutrophil/SCEs/flex_qcMetrics.sce")

Have to remove low quality cells or downstream steps fail due to missing values

is.mito <- grepl("^MT-", rownames(flex))
qcstats <- perCellQCMetrics(flex, subsets=list(Mito=is.mito))
filtered <- quickPerCellQC(qcstats, percent_subsets="subsets_Mito_percent")
flex <- flex[, !filtered$discard]

Normalisation and feature selection

# Normalization.
flex <- logNormCounts(flex)

# Feature selection.
dec <- modelGeneVar(flex)
hvg <- getTopHVGs(dec, prop=0.1)

Principle component analysis

# PCA.
set.seed(1234)
flex <- runPCA(flex, ncomponents=12, subset_row=hvg)

# Clustering.
colLabels(flex) <- clusterCells(flex, use.dimred='PCA',
    BLUSPARAM=NNGraphParam(cluster.fun="louvain"))    

UMAP

flex <- runUMAP(flex, dimred = 'PCA')

#saveRDS(flex, "/vast/scratch/users/brown.d/data_in_brief/flex_umap.rds")
#flex <- readRDS("/vast/scratch/users/brown.d/data_in_brief/flex_umap.rds")

Visulaise UMAP

By cluster label

p1 <- plotUMAP(flex, colour_by="label",rasterise=TRUE) +
  xlab("") + ylab("") +
  theme_Publication()

p1

By library size

flex$LibSize <- log(flex$sum)
p2 <- plotUMAP(flex, colour_by="LibSize", rasterise=TRUE) + 
  xlab("") + ylab("") +
  guides(colour=guide_legend(title="Log library \nsize")) +
  theme_Publication()

p2$layers[[1]]$aes_params$size <- 0.1
p2 + guides(color = guide_legend(override.aes = list(size = 2, alpha=1)))

By batch

The 2 batches are the different cell inputs into a distinct flex device.

p3 <- plotUMAP(flex, colour_by="Batch", rasterise=TRUE) +
  xlab("") + ylab("") +
  guides(color = guide_legend(override.aes = list(size = 2, alpha=1))) +
  theme_Publication()

p3$layers[[1]]$aes_params$alpha <- 0.1
p3$layers[[1]]$aes_params$size <- 0.1
p3

Session Info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /stornext/System/data/tools/openBLAS/openBLAS-0.3.23-gcc-11.3.0/lib/libopenblas_haswellp-r0.3.23.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] bluster_1.12.0              ggthemes_5.1.0             
##  [3] lubridate_1.9.3             forcats_1.0.0              
##  [5] stringr_1.5.1               dplyr_1.1.4                
##  [7] purrr_1.0.2                 readr_2.1.5                
##  [9] tidyr_1.3.1                 tibble_3.2.1               
## [11] tidyverse_2.0.0             knitr_1.45                 
## [13] scran_1.30.2                scater_1.30.1              
## [15] ggplot2_3.5.0               scuttle_1.12.0             
## [17] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
## [19] Biobase_2.62.0              GenomicRanges_1.54.1       
## [21] GenomeInfoDb_1.38.6         IRanges_2.36.0             
## [23] S4Vectors_0.40.2            BiocGenerics_0.48.1        
## [25] MatrixGenerics_1.14.0       matrixStats_1.2.0          
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7              gridExtra_2.3            
##  [3] rlang_1.1.3               magrittr_2.0.3           
##  [5] RcppAnnoy_0.0.22          compiler_4.3.2           
##  [7] DelayedMatrixStats_1.24.0 vctrs_0.6.5              
##  [9] pkgconfig_2.0.3           crayon_1.5.2             
## [11] fastmap_1.1.1             XVector_0.42.0           
## [13] labeling_0.4.3            utf8_1.2.4               
## [15] rmarkdown_2.25            tzdb_0.4.0               
## [17] ggbeeswarm_0.7.2          xfun_0.42                
## [19] zlibbioc_1.48.0           cachem_1.0.8             
## [21] beachmat_2.18.1           jsonlite_1.8.8           
## [23] highr_0.10                DelayedArray_0.28.0      
## [25] BiocParallel_1.36.0       irlba_2.3.5.1            
## [27] parallel_4.3.2            cluster_2.1.6            
## [29] R6_2.5.1                  bslib_0.6.1              
## [31] stringi_1.8.3             limma_3.58.1             
## [33] jquerylib_0.1.4           Rcpp_1.0.12              
## [35] Matrix_1.6-1              igraph_1.6.0             
## [37] timechange_0.3.0          tidyselect_1.2.0         
## [39] rstudioapi_0.15.0         abind_1.4-5              
## [41] yaml_2.3.8                viridis_0.6.5            
## [43] codetools_0.2-19          lattice_0.22-5           
## [45] withr_3.0.0               ggrastr_1.0.2            
## [47] evaluate_0.23             pillar_1.9.0             
## [49] generics_0.1.3            rprojroot_2.0.4          
## [51] RCurl_1.98-1.14           hms_1.1.3                
## [53] sparseMatrixStats_1.14.0  munsell_0.5.0            
## [55] scales_1.3.0              glue_1.7.0               
## [57] metapod_1.10.1            tools_4.3.2              
## [59] BiocNeighbors_1.20.2      ScaledMatrix_1.10.0      
## [61] locfit_1.5-9.8            Cairo_1.6-2              
## [63] cowplot_1.1.3             edgeR_4.0.16             
## [65] colorspace_2.1-0          GenomeInfoDbData_1.2.11  
## [67] beeswarm_0.4.0            BiocSingular_1.18.0      
## [69] vipor_0.4.7               cli_3.6.2                
## [71] rsvd_1.0.5                fansi_1.0.6              
## [73] S4Arrays_1.2.0            viridisLite_0.4.2        
## [75] uwot_0.1.16               gtable_0.3.4             
## [77] sass_0.4.8                digest_0.6.34            
## [79] SparseArray_1.2.4         ggrepel_0.9.5            
## [81] dqrng_0.3.2               farver_2.1.1             
## [83] htmltools_0.5.7           lifecycle_1.0.4          
## [85] here_1.0.1                statmod_1.5.0